/*

Inputs:
	> tmp/voter_addresses_unique_geocoded.dta [full address file from the 2018 with geocodes]
	> tmp/strassen_`election'_voll_buchstabe [address - precinct assignments for each election]

Output: tmp/voter_id_panel_raw.dta
		[balanced panel of voter-addresses and mapping to precinct (but no polling location yet)]

Main task:
	> merge address geocoordinates from 2018 geolocation
	> identify voter addresses that appear in ALL elections
	> gen voter_id:= string ID of voter address 
	> save election-sepecific files 
	> append all elections

*/
	
*** 1) Create list of addresses that appear in EVERY election
* PULL: geocoded addresses based on 2018 electoral roll	
use "$tmp/voter_addresses_unique_geocoded.dta", clear // 153,938 addresses

* Merge addresses from other elecection and KEEP only addresses that appear in ALL Elections (balanced panel)
	foreach election in "btw17" "ltw18" "euw19" "kow20" "kow14" "ltw13" "btw09" {
		merge 1:1 stadtbez merge_strasse nummer using "$tmp/strassen_`election'_voll_buchstabe", keep(3) nogen keepusing(stadtbez merge_strasse nummer)
	}


*rename and destring
	ren 	g_lat lat_vote
	lab var lat_vote "latitude of voters address"
	ren 	g_lon lon_vote
	lab var lon_vote "longitude of voters address"
	destring  lat_vote lon_vote, replace
	
* Gen Voter-ID														
	gen 	voter_id_str 	= merge_strasse + nummer
	lab var voter_id_str "ID for addresses of eligible voters"

*save
	isid voter_id_str
	tempfile voter_merged_all_geo_final
	save `voter_merged_all_geo_final'

*** 2) Use address-precinct mappings; KEEP balanced addresses and merge geocodes to addresses
foreach election in "btw09" "ltw13" "kow14" "btw17" "ltw18" "euw19" "kow20" {
	
	* PULL: assignment of voter-addresses to precincts
	use "$tmp/strassen_`election'_voll_buchstabe", clear
	gen wahl = upper("`election'")
	
	// DROP addresses that are not in EVERY election + merge geocodes
	merge 1:1 stadtbez merge_strasse nummer using `voter_merged_all_geo_final', nogen keep(3) assert(1 3)
	
	keep stadtbez voter_id_str lat_vote lon_vote sb wahl
	order voter_id_str stadtbez sb lon_vote lat_vote
	
	label var sb "precint (first two digits for the city district)"
	label var stadtbez "city district (1-25)"
	isid voter_id_str
	
	tempfile strassen_`election'_geo
	save `strassen_`election'_geo'
}


*** 3) append into BALANCED panel at voter-address x election
clear 
local j=0
gen wahl_id = .
 /*note: ltw13 and kow14 are duplicates b/c btw13 and euw14 had NO CHANGES to assignments */
foreach election in "btw09" "ltw13" "ltw13" "kow14" "kow14" "btw17" "ltw18" "euw19" "kow20" {  
	append using `strassen_`election'_geo'
	replace wahl_id = `j' if missing(wahl_id)
	replace wahl = "BTW13" if wahl_id==2
	replace wahl = "EUW14" if wahl_id==4
	local ++j
}

 // gen numeric voter_id 
	egen 	voter_id=group(voter_id_str)
	lab var voter_id "Numerical voter-address ID"

 // label election ID (in chronological order)
	lab def wahl_id 0 "BTW09" 1 "LTW13" 2 "BTW13" 3 "KOW14" 4 "EUW14" 5 "BTW17" 6 "LTW18" 7 "EUW19" 8 "KOW20"
	lab val wahl_id wahl_id
	lab var wahl_id "Election ID (num, chronological)"
	lab var wahl 	"Election ID (string)"

	lab var sb 		 "Precinct ID (Stimmbezirk)"
	lab var stadtbez "District ID (Stadtbezirk)"


 // SAVE: balanced panel of addresses x election (no polling locations yet)
	isid voter_id wahl_id
	save "$tmp/voter_id_panel_raw.dta", replace

	